************************************************************
*****     			CLEAN REGISTRY DATA			       *****
************************************************************

/* CONTENTS:
1. Create dataset with health shock sample
2. Clean parental labor data
3. Clean parental mental health data
4. Clean parental marital status data 
5. Clean parental education data
6. Clean parental leave data
7. Matched sample for Mutual Shocks (Hospitalizations)
8. Create dataset with all hospitalizations for children
*/

*********************************************  
*** 1.CREATE HEALTH SHOCK SAMPLE
*********************************************
{
  
use "S:\Project\DemoSos2\data\NPR\som2008_2014.dta",clear
drop if lopenr == ""

* Keep only those aged 0-18
keep if alder <=18
tab alder
codebook lopenr 

rename innUke Week 
rename aar Year 

* Generate date of hospital admission
do GenNPRdate.do
gen InnDato = mdy(M,D,Year)
format InnDato %td
label var InnDato "Date hosptial admission"
gen Month = month(InnDato) 
codebook lopenr
drop if InnDato ==.   
drop D M
codebook lopenr
 
keep lopenr Year Week alder kjonn Behandlingsniva3 innmateHast hoved1 liggetid InnDato Month
order lopenr InnDato Year Month Week alder kjonn Behandlingsniva3 innmateHast hoved1 liggetid
duplicates drop

* Clean data 
* Some have the same admission over several rows ->  
	** Sum the no. of nights in hospital 
egen sum_liggetid=sum(liggetid), by(lopenr InnDato) 
bys lopenr InnDato: gen N = _N
bys lopenr InnDato: gen n = _n
replace liggetid=sum_liggetid if N>1 
drop sum_liggetid N n
duplicates drop

	** Keep one diagnose per acute adm. and one per planned adm.
bys lopenr InnDato Behandlingsniva3 innmateHast: gen N =_N
bys lopenr InnDato Behandlingsniva3 innmateHast: gen n =_n 
drop if n > 1 & N > 1   
drop N n
 
	** Drop those who have a planned and acute visit the same day (= PRE == 1)
gen acute = (innmateHast == 1)
gen planned = (innmateHast == 2)

bys lopenr InnDato: egen num_acute = max(acute)
bys lopenr InnDato: egen num_planned = max(planned)

gen drop_flag = (num_acute == 1 & num_planned == 1)
drop if drop_flag
drop acute planned num_acute num_planned drop_flag

* Some have an acute admission and acute outpatient consultation at the same date, keep the admission
	** Identify admission and consultation
by lopenr InnDato: gen N = _N 
by lopenr InnDato: keep if N == 1 | ( N > 1 & Behandlingsniva3 == 1 & innmateHast == 1)
drop N

* Identify and keep acute admissions with overnight stay, and ensure no visit 365 days before
gen healthshock=(innmateHast == 1 & Behandlingsniva3==1 & liggetid>0) 
bys lopenr: egen HS=max(healthshock) 
codebook lopenr if HS==0  
drop if HS==0 
drop HS

* Generate variable for the first health shock 
bys lopenr healthshock (InnDato): gen hs1=_n 
replace hs1=. if healthshock!=1
tab hs1, m

* Drop those who have a hosptial stay at all in the year before the HS 
sort lopenr InnDato
gen NPRsPre = (lopenr == lopenr[_n-1] & InnDato - InnDato[_n-1] <= 365 & hs1== 1)
tab NPRsPre
bys lopenr: egen PRE=max(NPRsPre) 
codebook lopenr if PRE == 0 
drop if PRE==1
codebook lopenr 

* Keep only the first health shock per individual
keep if hs1 == 1  
ren Year Year_HS
tab Year_HS

* Keep those with the first admission between ages 6-18 
tab alder
keep if alder > 5
codebook lopenr  
tab alder  
sum liggetid

* Drop all diagnoses related to birht and pregnancy
gen diagnose = (substr(hoved1, 1, 1))
encode diagnose, gen(Diagnose)
drop if Diagnose==15 | Diagnose==16 	

* Keep relevant variables and rename
keep lopenr InnDato Year_HS alder kjonn liggetid Diagnose hoved1 
ren InnDato first_inphospital

* Year of first hospital admission
g yearinp=year(first_inphospital)

codebook lopenr
save "$processed_data\sample_healthshock.dta", replace

** BACKGROUND VARIABLES CHILDREN 
use "$processed_data\sample_healthshock.dta", clear
merge 1:1 lopenr using "S:\Project\DemoSos2\data\Befolkn\Demogr\fastevar"
keep if _merge == 3
gen NORborn = (invkat=="A")
ren Fodtaar child_b_year
ren alder age_firstinphospital 
label var age_firstinphospital "Age at event time"
ren kjonn sex
recode sex 1=0 2=1
la def sex_ 0 "Male" 1 "Female"
la val sex sex_

gen male=(sex==0)
label var male "Male"

drop sex kjoenn fodeland landbak3gen invkat fodtmnd forstdato _merge
save "$processed_data\sample_healthshock.dta", replace
 
* Mortality children
use "$processed_data\sample_healthshock.dta", clear
keep lopenr
merge 1:1 lopenr using "S:\Project\DemoSos2\data\DAR\DAR.dta"
keep if _merge == 3 
drop _merge 
gen dead=1 
ren dodsaar deathyear
keep lopenr dead deathyear
save "$processed_data\DAR_temp.dta", replace

use "$processed_data\sample_healthshock.dta", clear
merge 1:1 lopenr using "$processed_data\DAR_temp.dta"
drop _merge

replace dead = 0 if dead == . 
tab dead  

save "$processed_data\sample_healthshock.dta", replace
erase "$processed_data\DAR_temp.dta"
  
** BACKGROUND VARIABLES PARENTS 
use "$processed_data\sample_healthshock.dta", clear
drop if lopenr_mor == "" & lopenr_far == "" 
save "$processed_data\sample_healthshock.dta", replace

* MOTHERS *
use "$processed_data\sample_healthshock.dta", clear
codebook lopenr
drop if lopenr_mor == ""  
keep lopenr_mor Year_HS
ren lopenr_mor lopenr
bys lopenr: keep if _n==1  
merge 1:1 lopenr using "S:\Project\DemoSos2\data\Befolkn\Demogr\fastevar.dta"
keep if _merge==3
drop _merge

* Birth year, and marital status and immigrant status in year t-2
keep lopenr invkat Fodtaar Year_HS
gen Year_HS_minus2=Year_HS-2
merge 1:m lopenr using "S:\Project\DemoSos2\data\Befolkn\Demogr\sivilstand.dta"
drop if _merge==2
sort lopenr aar

gen married_mom=(aar == Year_HS_minus2 & sivilstand == 2) if sivilstand!=.
label var married_mom "Married"
gen unmarried_mom=(aar == Year_HS_minus2 & sivilstand == 1) if sivilstand!=.
label var unmarried_mom "Unmarried"
gen divorced_mom=(aar == Year_HS_minus2 & sivilstand == 4 | sivilstand == 5) if sivilstand!=. 
label var divorced_mom "Divorced"

bys lopenr: keep if aar == Year_HS_minus2
codebook lopenr


gen NORborn_mom = (invkat=="A")
label var NORborn_mom "Norwegian"
drop invkat
ren Fodtaar mother_b_year
ren lopenr lopenr_mor
keep lopenr_mor mother_b_year married_mom unmarried_mom divorced_mom NORborn_mom
save "$processed_data\temp_mom.dta", replace

use "$processed_data\sample_healthshock.dta", clear
merge m:1 lopenr_mor using "$processed_data\temp_mom.dta"
drop if lopenr_mor ==""
keep if _merge == 3
drop _merge 

** Age of mother at birth
gen mother_age = child_b_year - mother_b_year
label var mother_age "Age at birth"
		

* Age of mother in hospital
gen age_mother_inp=yearinp - mother_b_year 
label var age_mother_inp "Age at admission"

save "$processed_data\sample_healthshock.dta", replace
erase "$processed_data\temp_mom.dta"

* FATHERS *
use "$processed_data\sample_healthshock.dta", clear
keep lopenr_far Year_HS
ren lopenr_far lopenr
bys lopenr: keep if _n==1
drop if lopenr==""
merge 1:1 lopenr using "S:\Project\DemoSos2\data\Befolkn\Demogr\fastevar.dta"
drop if _merge==2
drop _merge

* Birth year, and marital status and immigrant status in year t-2
keep lopenr invkat Fodtaar Year_HS
gen Year_HS_minus2=Year_HS-2
merge 1:m lopenr using "S:\Project\DemoSos2\data\Befolkn\Demogr\sivilstand.dta"
drop if _merge==2
sort lopenr aar

gen married_dad=(aar == Year_HS_minus2 & sivilstand == 2) if sivilstand!=.
label var married_dad "Married"
gen unmarried_dad=(aar == Year_HS_minus2 & sivilstand == 1) if sivilstand!=.
label var unmarried_dad "Unmarried"
bys lopenr: keep if aar == Year_HS_minus2
codebook lopenr

gen NORborn_dad = (invkat=="A")
label var NORborn_dad "Norwegian"
drop invkat
ren Fodtaar father_b_year
ren lopenr lopenr_far
keep lopenr_far father_b_year married_dad unmarried_dad NORborn_dad
save "$processed_data\temp_dad.dta", replace

use "$processed_data\sample_healthshock.dta", clear
merge m:1 lopenr_far using "$processed_data\temp_dad.dta"
drop _merge   

* Age of father in hospital
gen age_father_inp=yearinp - father_b_year 
label var age_father_inp "Age at admission"

save "$processed_data\sample_healthshock.dta", replace
erase "$processed_data\temp_dad.dta"
  
** KEEP FIRST CHILD THAT HAS A HEALTH SHOCK IN EACH FAMILY
use "$processed_data\sample_healthshock.dta", clear
sort lopenr_mor Year_HS
bys lopenr_mor: gen n=_n
order lopenr_mor Year_HS n
bys lopenr_mor: gen N=_N
tab n
keep if n==1
drop n N
save "$processed_data\sample_healthshock.dta", replace
  
** GENERATE DIAGNOSIS GROUPS
* Generate high-level subgroups
use "$processed_data\sample_healthshock.dta", clear
g grouponly = substr(hoved1,1,2) 
g groups = 1 if substr(grouponly,1,1) == "A" | substr(grouponly,1,1) == "B" 
replace groups = 2 if substr(grouponly,1,1) == "C"
replace groups = 2 if substr(grouponly,1,2) == "D1" | ///
substr(grouponly,1,2) == "D2" | substr(grouponly,1,2) == "D3" | ///
substr(grouponly,1,2) == "D4"   
replace groups = 3 if substr(grouponly,1,1) == "D" & groups != 2
replace groups = 4 if substr(grouponly,1,1) == "E"
replace groups = 5 if substr(grouponly,1,1) == "F"
replace groups = 6 if substr(grouponly,1,1) == "G"
replace groups = 7 if substr(grouponly,1,2) == "H1" | ///
substr(grouponly,1,2) == "H2" | substr(grouponly,1,2) == "H3" | ///
substr(grouponly,1,2) == "H4"   | substr(grouponly,1,2) == "H5"
replace groups = 8 if substr(grouponly,1,1) == "H" & groups != 7
replace groups = 9 if substr(grouponly,1,1) == "I"
replace groups = 10 if substr(grouponly,1,1) == "J"
replace groups = 11 if substr(grouponly,1,1) == "K"
replace groups = 12 if substr(grouponly,1,1) == "L"
replace groups = 13 if substr(grouponly,1,1) == "M"
replace groups = 14 if substr(grouponly,1,1) == "N"
replace groups = 15 if substr(grouponly,1,1) == "O"
replace groups = 17 if substr(grouponly,1,1) == "Q"
replace groups = 18 if substr(grouponly,1,1) == "R"
replace groups = 19 if substr(grouponly,1,1) == "S" | ///
substr(grouponly,1,1) == "T"
replace groups = 21 if substr(grouponly,1,1) == "Z"
* Note: group 20 only one observation
label define icd10 1 "Infections" 2 "Neoplasms" 3 "Blood" ///
4 "Endocrine" 5 "Mental" 6 "Nervous" 7 "Eye" 8 "Ear" 9 "Circulatory" ///
10 "Respiratory" 11 "Digestive" 12 "Skin" 13 "Muscoloskeletal" ///
14 "Genitourinary" 17 "Congenital" ///
18 "Symptoms" 19 "Injury" 21 "Factors", replace 
label val groups icd10
tab groups,m 
save "$processed_data\sample_healthshock.dta", replace
}
*********************************************  
*** 2.CLEAN LABOR DATA
*********************************************
{
use "S:\Project\DemoSos2\data\Inntekt\Inntekt2000.dta", clear
forvalues k = 2001/2014 {
append using "S:\Project\DemoSos2\data\Inntekt\Inntekt`k'.dta"
}
save "$processed_data/Income_employment_2000_2014.dta", replace 

use "$processed_data/Income_employment_2000_2014.dta", clear
keep lopenr aar wyrkinnt wsaminnt overfor wkapinnt

ren aar Year

* gen income in EUROS
/*
2014	8.3534
2013	7.8087
2012	7.4744
2011	7.7926
2010	8.0068
2009	8.7285
2008	8.2194
2007	8.0153
2006	8.0510
2005	8.0073
2004	8.3715
2003	8.0039
2002	7.5073
2001	8.0492
2000	8.1109

*/

* Work income  
replace wyrkinnt=0 if wyrkinnt < 0 | wyrkinnt==.
sum wyrkinnt

gen income=.
replace income=wyrkinnt/8.1109 if Year == 2000
replace income=wyrkinnt/8.0492 if Year == 2001
replace income=wyrkinnt/7.5073 if Year == 2002
replace income=wyrkinnt/8.0039 if Year == 2003
replace income=wyrkinnt/8.3715 if Year == 2004
replace income=wyrkinnt/8.0073 if Year == 2005
replace income=wyrkinnt/8.0510 if Year == 2006
replace income=wyrkinnt/8.0153 if Year == 2007
replace income=wyrkinnt/8.2194 if Year == 2008
replace income=wyrkinnt/8.7285 if Year == 2009
replace income=wyrkinnt/8.0068 if Year == 2010
replace income=wyrkinnt/7.7926 if Year == 2011
replace income=wyrkinnt/7.4744 if Year == 2012
replace income=wyrkinnt/7.8087 if Year == 2013
replace income=wyrkinnt/8.3534 if Year == 2014

*Transfers
replace overfor=0 if overfor < 0 | overfor==.
sum overfor 

gen transfers=.
replace transfers=overfor/8.1109 if Year == 2000
replace transfers=overfor/8.0492 if Year == 2001
replace transfers=overfor/7.5073 if Year == 2002
replace transfers=overfor/8.0039 if Year == 2003
replace transfers=overfor/8.3715 if Year == 2004
replace transfers=overfor/8.0073 if Year == 2005
replace transfers=overfor/8.0510 if Year == 2006
replace transfers=overfor/8.0153 if Year == 2007
replace transfers=overfor/8.2194 if Year == 2008
replace transfers=overfor/8.7285 if Year == 2009
replace transfers=overfor/8.0068 if Year == 2010
replace transfers=overfor/7.7926 if Year == 2011
replace transfers=overfor/7.4744 if Year == 2012
replace transfers=overfor/7.8087 if Year == 2013
replace transfers=overfor/8.3534 if Year == 2014

* Total income (transfers + capital income + work income)
*replace wsaminnt=0 if wsaminnt < 0 | wsaminnt==.
* remove capital income from total income: 
g nv=wsaminnt-wkapinnt
replace nv=0 if nv==.

gen totalincome=0
replace totalincome=nv/8.1109 if Year == 2000
replace totalincome=nv/8.0492 if Year == 2001
replace totalincome=nv/7.5073 if Year == 2002
replace totalincome=nv/8.0039 if Year == 2003
replace totalincome=nv/8.3715 if Year == 2004
replace totalincome=nv/8.0073 if Year == 2005
replace totalincome=nv/8.0510 if Year == 2006
replace totalincome=nv/8.0153 if Year == 2007
replace totalincome=nv/8.2194 if Year == 2008
replace totalincome=nv/8.7285 if Year == 2009
replace totalincome=nv/8.0068 if Year == 2010
replace totalincome=nv/7.7926 if Year == 2011
replace totalincome=nv/7.4744 if Year == 2012
replace totalincome=nv/7.8087 if Year == 2013
replace totalincome=nv/8.3534 if Year == 2014

* Employed 
gen employed=(wyrkinnt> 0)

* Dropping variables
drop overfor wyrkinnt wsaminnt nv wkapinnt

* Dataset in wide format
reshape wide income transfers totalincome employed , i(lopenr) j(Year)
save "$processed_data/Income_employment_2000_2014_wide.dta", replace
	
	* Mother
use "$processed_data/Income_employment_2000_2014_wide.dta", clear
ren lopenr lopenr_mor
save "$processed_data/Income_mom.dta", replace

	* Father
use "$processed_data/Income_employment_2000_2014_wide.dta", clear
ren lopenr lopenr_far
save "$processed_data/Income_dad.dta", replace

erase "$processed_data\Income_employment_2000_2014.dta"
erase "$processed_data\Income_employment_2000_2014_wide.dta"

}
*********************************************  
*** 3.CLEAN MENTAL HEALTH
*********************************************
{
forvalues i = 2006 / 2014       {
	use "S:\Project\DemoSos2\data\KUHR\KUHR`i'", clear
	drop if lopenr == ""
   
	** Keep GP cons. and ER visits
	keep if TypePraksis == "Fastlege" | TypePraksis == "Fastlønnet"  | TypePraksis == "Legevakt" | TypePraksis == "Legevakt kommunal"
	** Keep consultations
    keep if ((regexm(Takst, "2ADx1") | regexm(Takst, "2AKx1")))
	
   	** GP visits related to mental health diagnoses
	gen GPP = (regexm(Diagnose, "P")) if  regexm(TypePraksis, "Fastlege") | regexm(TypePraksis, "Fastlønnet")  
	bysort lopenr: egen n_diag_year = sum(GPP)
   
	keep lopenr aar n_diag_year     
	bysort lopenr: keep if _n == 1
	ren aar Year 
	compress	
	save "$processed_data/KUHRdata_`i'.dta", replace
	}	

use "$processed_data/KUHRdata_2006.dta", clear
forvalues i = 2007 / 2014 {
	append using "$processed_data/KUHRdata_`i'.dta"
	}

save "$processed_data/KUHRdata_year.dta", replace

forvalues i = 2006 / 2014 {
	erase "$processed_data/KUHRdata_`i'.dta"
	} 

 
* Dataset in wide format
use "$processed_data/KUHRdata_year.dta", clear
reshape wide n_diag_year, i(lopenr) j(Year)
for var n_diag_year*: replace X = 0 if X == .
save "$processed_data/Mental_health_visits_2006_2014_wide.dta", replace 

	* Mother
use "$processed_data/Mental_health_visits_2006_2014_wide.dta", clear
ren lopenr lopenr_mor
save "$processed_data/Mental_health_mom.dta", replace

	* Father
use "$processed_data/Mental_health_visits_2006_2014_wide.dta", clear
ren lopenr lopenr_far
save "$processed_data/Mental_health_dad.dta", replace

erase "$processed_data\KUHRdata_year.dta"
erase "$processed_data\Mental_health_visits_2006_2014_wide.dta"

}
*********************************************  
*** 4.CLEAN PARENTAL MARITAL STATUS
*********************************************
{
use "S:\Project\DemoSos2\data\Befolkn\Demogr\sivilstand.dta", clear
keep if aar > 1999

gen married=(sivilstand == 2) if sivilstand!=.
gen unmarried=(sivilstand == 1) if sivilstand!=.
gen divorced=(sivilstand == 4 | sivilstand == 5) if sivilstand!=. 

keep lopenr aar married unmarried divorced

reshape wide married unmarried divorced, i(lopenr) j(aar)
save "$processed_data/Marital_status_wide.dta", replace

	* 9.1. Mother
use "$processed_data/Marital_status_wide.dta", clear
ren lopenr lopenr_mor
save "$processed_data/Marital_status_mom.dta", replace

	* 9.2. Father
use "$processed_data/Marital_status_wide.dta", clear
ren lopenr lopenr_far
save "$processed_data/Marital_status_dad.dta", replace

erase "$processed_data\Marital_status_wide.dta"

}
*********************************************  
*** 5.CLEAN PARENTAL EDUCATION
********************************************* 
{
use "S:\Project\DemoSos2\data\Utd\BU92_14.dta", clear
drop igang	

reshape wide BU, i(lopenr) j(aar)
save "$processed_data/Educ_wide.dta", replace

use "$processed_data/Educ_wide.dta", clear
ren lopenr lopenr_mor
save "$processed_data/Educ_mom.dta", replace

use "$processed_data/Educ_wide.dta", clear
ren lopenr lopenr_far
save "$processed_data/Educ_dad.dta", replace


erase "$processed_data/Educ_wide.dta"
}
*********************************************  
*** 6.CLEAN PARENTAL LEAVE 
********************************************* 
{
* Prepare data for parents leave days, by month and year
use "$processed_data\sample_healthshock.dta", clear
keep lopenr_mor child_b_year
ren lopenr_mor lopenr 
ren child_b_year aargang
merge 1:m lopenr aargang using "S:/Project/DemoSos2/data/FDtrygd/demogr/tab_fods.dta"
keep if _merge == 3
keep lopenr aargang fdato
ren lopenr lopenr_mor
ren aargang child_b_year
duplicates drop

bys lopenr_mor: drop if _N > 1 // 60 mothers have two birhts in same year, drop those
save "$processed_data\temp_fdato_barn.dta", replace

use "$processed_data\sample_healthshock.dta", clear
merge 1:1 lopenr_mor child_b_year using "$processed_data\temp_fdato_barn.dta"
keep if _merge == 3 
drop _merge 
save "$processed_data\sample_healthshock_parental_leave.dta", replace
erase "$processed_data\temp_fdato_barn.dta"

* ID file parents
use "$processed_data\sample_healthshock_parental_leave.dta", clear
keep lopenr_mor
ren lopenr_mor lopenr
duplicates drop
save "$processed_data\temp_id_mom.dta", replace

use "$processed_data\sample_healthshock_parental_leave.dta", clear
keep lopenr_far
ren lopenr_far lopenr
duplicates drop
save "$processed_data\temp_id_dad.dta", replace

use "$processed_data\temp_id_mom.dta", clear
append using "$processed_data\temp_id_dad.dta"
duplicates drop 
save "$processed_data\temp_id_parent.dta", replace

erase "$processed_data\temp_id_mom.dta"
erase "$processed_data\temp_id_dad.dta"

* Leave Days FDtrygd	
use "S:\Project\DemoSos2\data\FDtrygd\trygd\f_fp.dta", clear
merge m:1 lopenr using "$processed_data\temp_id_parent.dta"
keep if _merge == 3
drop _merge

* Drop left censored and any changes
tab fpkode
gen tmp1 = string(fpkode)
gen tmp2 = substr(tmp1, 2,.)
drop if (tmp2=="2"|tmp2=="3")
drop tmp*
bys lopenr fptilg: gen n = _n
tab n
drop n

ren fptilg fomdate
ren fpavg tomdate

gen tmp1 = year(fomdate)
gen tmp2 = year(tomdate)
sum tmp1 tmp2
drop tmp*

local Start = 1990
local Stopp = 2015

local StDato = d(1jan`Start')
local SlDato = d(1jan`Stopp')
  
* Drop all records that end before Jan 1st 1990 or start after Jan 1st 2015
drop if tomdate < `StDato' | fomdate > `SlDato'
  
* For those who havn't ended their record, we set the end date to Dec 31st in end yr
sort lopenr fomdate
replace tomdate = d(31dec`Stopp') if tomdate == . 

* Keep min tomdate if more than one
bys lopenr fomdate: egen tmp1 = min(tomdate)
format tmp1 %td
keep if tmp1 == tomdate
bys lopenr fomdate: gen n = _n
sum n
drop n

* Drop those with start date equal to end date
drop if fomdate == tomdate

save "$processed_data\temp_FDfpleave_all.dta", replace
* Keep only variables needed
keep lopenr fomdate tomdate 
sort lopenr fomdate tomdate 
save "$processed_data\temp_FDfpleave.dta", replace

** Count number of days per month (year)
forvalues year = `Start' / `Stopp'        {
  use "$processed_data\temp_FDfpleave.dta", clear
  foreach month in jan mar may jul aug oct dec   {
    gen tempDate = d(1`month'`year')
    local j = month(tempDate)
    gen dager_`j'_`year' = .
    gen st`j'_`year' = d(1`month'`year')
    gen sl`j'_`year' = d(31`month'`year')
    replace dager_`j'_`year' = 31 if fomdate<= st`j'_`year' & tomdate >= sl`j'_`year' 
    replace dager_`j'_`year' = sl`j'_`year' - fomdate + 1 if fomdate > st`j'_`year' & fomdate <= sl`j'_`year'
    replace dager_`j'_`year' = tomdate -st`j'_`year' + 1 if fomdate <= st`j'_`year' & tomdate > st`j'_`year' & tomdate < sl`j'_`year'
    replace dager_`j'_`year' = tomdate - fomdate + 1 if fomdate >= st`j'_`year' & tomdate <= sl`j'_`year'
    drop st`j'_`year' sl`j'_`year' tempDate
  }		
  *februar*
  gen dager_2_`year' = .
  gen st2_`year' = d(1feb`year')
  gen sl2_`year' = d(28feb`year')
  replace dager_2_`year' = 28 if fomdate <= st2_`year' & tomdate >= sl2_`year'
  replace dager_2_`year' = sl2_`year' - fomdate + 1 if fomdate > st2_`year' & fomdate <= sl2_`year'
  replace dager_2_`year' = tomdate - st2_`year' + 1 if fomdate <= st2_`year' & tomdate > st2_`year' & tomdate < sl2_`year'
  replace dager_2_`year' = tomdate - fomdate + 1 if fomdate >= st2_`year' & tomdate <= sl2_`year'
  drop st2_`year' sl2_`year'
 *30-dagersmåneder*
  foreach month in apr jun sep nov         {     
    gen tempDate = d(1`month'`year')
    local j = month(tempDate)
    gen dager_`j'_`year' = .
    gen st`j'_`year' = d(1`month'`year')
    gen sl`j'_`year' = d(30`month'`year')
    replace dager_`j'_`year' = 30 if fomdate<= st`j'_`year' & tomdate>= sl`j'_`year'
    replace dager_`j'_`year' = sl`j'_`year' - fomdate + 1 if fomdate > st`j'_`year' & fomdate<= sl`j'_`year'
    replace dager_`j'_`year' = tomdate - st`j'_`year' + 1 if fomdate <= st`j'_`year' & tomdate > st`j'_`year' & tomdate< sl`j'_`year'
    replace dager_`j'_`year' =tomdate - fomdate + 1 if fomdate >= st`j'_`year' & tomdate <= sl`j'_`year'
    drop st`j'_`year' sl`j'_`year' tempDate
  }

  keep lopenr dager_1_`year' dager_2_`year' dager_3_`year' dager_4_`year' dager_5_`year' dager_6_`year' ///
    dager_7_`year' dager_8_`year' dager_9_`year' dager_10_`year' dager_11_`year' dager_12_`year'
				
  save "$processed_data\temp_FDfpleave_`year'.dta", replace		
 }
 
forvalues year = `Start'/`Stopp' {
  use "$processed_data\temp_FDfpleave_`year'.dta", clear
  foreach i in 1 3 5 7 8 10 12 {
    bysort lopenr: egen Dager`i' = sum(dager_`i'_`year')
    replace Dager`i' = 31 if Dager`i' > 31
  }
  foreach i in 4 6 9 11 {
    bysort lopenr: egen Dager`i' = sum(dager_`i'_`year')
    replace Dager`i' = 30 if Dager`i' > 30					
  }
  foreach i in 2 {
    bysort lopenr: egen Dager`i' = sum(dager_`i'_`year')
    replace Dager`i' = 28 if Dager`i' > 28	
  }
  drop dager* 
  
  bysort lopenr: keep if _n == 1
  reshape long Dager, i(lopenr) j(month)
  ren Dager days
  gen yr=`year'
  save "$processed_data\temp_FDfpleave_`year'.dta", replace		  
}

local Stopp1 = `Stopp' - 1
  forvalues year = `Start'/`Stopp1' {
  append using "$processed_data\temp_FDfpleave_`year'.dta"
}

forvalues year = `Start'/`Stopp' {
  cap erase "$processed_data\temp_FDfpleave_`year'.dta"
}

ren days Ldays
la var Ldays "Leave days"

order lopenr month yr Ldays
compress
save "$processed_data\0_FD_fpleave_month.dta", replace

** Merge in leave days for each child
** No of leave days taken within 24 months of birth

* MOTHERS
use "$processed_data\sample_healthshock_parental_leave.dta", clear
ren fdato Bdate
keep lopenr lopenr_mor Bdate

gen Year = year(Bdate)
gen Month = month(Bdate)
save "$processed_data\temp_bdate.dta", replace

use "$processed_data\0_FD_fpleave_month.dta", clear
ren lopenr lopenr_mor
ren yr Year
ren month Month
save "$processed_data\0_LD_month_mom.dta", replace

use "$processed_data\0_LD_month_mom.dta",clear
merge m:1 lopenr_mor Year Month using "$processed_data\temp_bdate.dta"
bys lopenr_mor: egen tmp1 = max(_merge)
drop if tmp1 == 1

sort lopenr_mor Year Month
gen X = 0 if Bdate!= .
replace X = X[_n-1]+1 if X==. & lopenr_mor == lopenr_mor[_n-1]
drop if X>36 & X!=.
drop if X==.
keep lopenr* Bdate X Month Year Ldays
bys lopenr_mor: carryforward lopenr, replace
bys lopenr_mor: carryforward Bdate, replace

save "$processed_data\temp_Ldaysmom.dta", replace

* FATHERS
use "$processed_data\sample_healthshock_parental_leave.dta", clear
ren fdato Bdate
keep lopenr lopenr_far Bdate

drop if lopenr_far == ""

gen Year = year(Bdate)
gen Month = month(Bdate)
save "$processed_data\temp_bdate_dad.dta", replace

use "$processed_data\0_FD_fpleave_month.dta", clear
ren lopenr lopenr_far
ren yr Year
ren month Month
save "$processed_data\0_LD_month_dad.dta", replace

use "$processed_data\0_LD_month_dad.dta",clear
merge m:1 lopenr_far Year Month using "$processed_data\temp_bdate_dad.dta"
bys lopenr_far: egen tmp1 = max(_merge)
drop if tmp1 == 1

sort lopenr_far Year Month
gen X = 0 if Bdate!= .
replace X = X[_n-1]+1 if X==. & lopenr_far == lopenr_far[_n-1]
drop if X>36 & X!=.
drop if X==.
keep lopenr* Bdate X Month Year Ldays
ren Ldays Ldays_dad

bys lopenr_far: carryforward lopenr, replace
bys lopenr_far: carryforward Bdate, replace


save "$processed_data\temp_Ldaysdad.dta", replace

use "$processed_data\temp_Ldaysmom", clear
merge m:1 lopenr Bdate Month Year X using "$processed_data\temp_Ldaysdad.dta"
keep if _merge == 3
drop _merge 
gen Ldays_fam = Ldays + Ldays_dad
save "$processed_data\temp_Ldays_mom_dad.dta", replace


** Find births of other children of the parents

* Mothers 
use "S:/Project/DemoSos2/data/FDtrygd/demogr/tab_fods.dta", clear
keep lopenr fdato
drop if lopenr == ""
ren fdato Bdate 
gen Year = year(Bdate)
gen Month = month(Bdate)
ren lopenr lopenr_mor 
ren Bdate Bdate_otherchildren
duplicates drop
codebook lopenr_mor
save "$processed_data/temp_births_mom.dta", replace

use "$processed_data/temp_Ldays_mom_dad.dta", clear
merge 1:m lopenr_mor Year Month using "$processed_data/temp_births_mom.dta"
drop if _merge == 2
drop _merge 

replace Bdate_otherchildren = . if Bdate_otherchildren == Bdate
g new_child_mother = (Bdate_otherchildren != .)
tab new_child_mother

bys lopenr_mor Year Month: gen N = _N
tab N
drop if N == 2
drop N
save "$processed_data/temp_Ldays_mom_dad.dta", replace 

* Fathers 
use "S:/Project/DemoSos2/data/FDtrygd/demogr/tab_fods.dta", clear
keep lopenr fdato
drop if lopenr == ""
ren fdato Bdate 
gen Year = year(Bdate)
gen Month = month(Bdate)
ren lopenr lopenr_far 
ren Bdate Bdate_otherchildren
save "$processed_data/temp_births_dad.dta", replace

use "$processed_data/temp_Ldays_mom_dad.dta", clear
merge 1:m lopenr_far Year Month using "$processed_data/temp_births_dad.dta"
drop if _merge == 2
drop _merge 

replace Bdate_otherchildren = . if Bdate_otherchildren == Bdate
g new_child_father = (Bdate_otherchildren != .)
tab new_child_father
drop Bdate_otherchildren

duplicates drop
sort lopenr Year Month
save "$processed_data/temp_Ldays_mom_dad.dta", replace 


use "$processed_data/temp_Ldays_mom_dad.dta", clear

* Generating a variable that is one the month before the mother has a new child 
bys lopenr_mor: gen date_otherchild = ym(Year,Month) if new_child_mother == 1
format %tm date_otherchild

bys lopenr_mor: egen max_date_otherchild = max(date_otherchild)
format %tm max_date_otherchild

gen date_leave = ym(Year,Month) 
format %tm date_leave

gen diff_leave_child2 = date_leave - max_date_otherchild

bys lopenr_mor: egen total_Ldays_mom=total(Ldays) if new_child_mother == 0 
bys lopenr_mor: egen total_Ldays_mom_2 =total(Ldays) if diff_leave_child2 < -1

replace total_Ldays_mom = total_Ldays_mom_2 if diff_leave_child2 != .

* Generating a variable that is one the month before the mother has a new child 
bys lopenr_far: gen date_otherchild_dad = ym(Year,Month) if new_child_father == 1
format %tm date_otherchild_dad

bys lopenr_far: egen max_date_otherchild_dad = max(date_otherchild_dad)
format %tm max_date_otherchild_dad

gen diff_leave_child2_dad = date_leave - max_date_otherchild_dad

bys lopenr_far: egen total_Ldays_dad=total(Ldays_dad) if new_child_father == 0 
bys lopenr_far: egen total_Ldays_dad_2 =total(Ldays_dad) if diff_leave_child2_dad < -1

replace total_Ldays_dad = total_Ldays_dad_2 if diff_leave_child2_dad != .

keep if diff_leave_child2 == . | diff_leave_child2_dad == . | diff_leave_child2 == -2 
duplicates drop lopenr_mor, force

keep lopenr* total_Ldays_mom total_Ldays_dad

gen total_Ldays_fam = total_Ldays_mom + total_Ldays_dad
gen share_mom = total_Ldays_mom / total_Ldays_fam
gen share_dad = total_Ldays_dad / total_Ldays_fam

save "$processed_data/Ldays_parents.dta", replace
}
*********************************************
*** 7. CREATE MATCHED SAMPLE FOR MUTUAL SHOCKS
*********************************************
{
/* use sample of child health shocks and create variable = 1 if mother had shock 1 month before or after her child*/

** Mother
/* hospitalization */
*** +- 1 month
use "$processed_data\sample_healthshock.dta", clear
ren lopenr lopenr_child
ren first_inphospital InnDato_child

ren lopenr_mor lopenr

merge 1:m lopenr using "S:\Project\DemoSos2\data\NPR\som2008_2014.dta"
keep if _merge == 3

rename innUke Week 
rename aar Year 
do GenNPRdate.do
gen InnDato = mdy(M,D,Year)
format InnDato %td
label var InnDato "Date hosptial admission"

g days_difference = InnDato - InnDato_child
g mutual_shock = 1 if days_difference <= 30 & days_difference >= - 30

keep if mutual_shock == 1
keep lopenr mutual_shock
rename lopenr lopenr_mor
duplicates drop lopenr_mor, force 
save "$processed_data\mutual_shock_month.dta", replace

*** +- 1 week
use "$processed_data\sample_healthshock.dta", clear
ren lopenr lopenr_child
ren first_inphospital InnDato_child

ren lopenr_mor lopenr

merge 1:m lopenr using "S:\Project\DemoSos2\data\NPR\som2008_2014.dta"
keep if _merge == 3 

rename innUke Week 
rename aar Year 
do GenNPRdate.do
gen InnDato = mdy(M,D,Year)
format InnDato %td
label var InnDato "Date hosptial admission"

g days_difference = InnDato - InnDato_child
g mutual_shock = 1 if days_difference <= 7 & days_difference >= - 7

keep if mutual_shock == 1 
keep lopenr mutual_shock
rename lopenr lopenr_mor
duplicates drop lopenr_mor, force
save "$processed_data\mutual_shock_week.dta", replace


/* Father */
/* father can appear more than once (divorced and then children in another family),  generate duplicates for father and then clean mutual shocks for each subsample*/
clear

use "$processed_data\sample_healthshock.dta", clear

ren lopenr lopenr_child
ren first_inphospital InnDato_child

ren lopenr_far lopenr
bysort lopenr: gen tag= _n
drop if lopenr == ""

foreach i in  1 2 3{
	preserve
keep if tag == `i'
save "$processed_data\subsample_f`i'.dta", replace
restore
}


* hospitalization
** +- 1 month

foreach father_subsample in 1 2 3 {
clear
use "$processed_data\subsample_f`father_subsample'.dta"
merge 1:m lopenr using "S:\Project\DemoSos2\data\NPR\som2008_2014.dta"
keep if _merge == 3

rename innUke Week 
rename aar Year 
do GenNPRdate.do
gen InnDato = mdy(M,D,Year)
format InnDato %td
label var InnDato "Date hosptial admission"

g days_difference = InnDato - InnDato_child
g mutual_shock_f = 1 if days_difference <= 30 & days_difference >= - 30

keep if mutual_shock_f == 1 
if _N > 0 {

keep lopenr lopenr_mor mutual_shock_f
rename lopenr lopenr_far
duplicates drop lopenr_far lopenr_mor, force
save "$processed_data\mutual_shock_month_f`father_subsample'.dta", replace
** no observations for subsample = 3
}
}

** +- 1 week

foreach father_subsample in 1 2 3 {
clear
use "$processed_data\subsample_f`father_subsample'.dta"
merge 1:m lopenr using "S:\Project\DemoSos2\data\NPR\som2008_2014.dta"
keep if _merge == 3

rename innUke Week 
rename aar Year 
do GenNPRdate.do
gen InnDato = mdy(M,D,Year)
format InnDato %td
label var InnDato "Date hosptial admission"

g days_difference = InnDato - InnDato_child
g mutual_shock_f = 1 if days_difference <= 7 & days_difference >= - 7

keep if mutual_shock_f == 1 
if _N > 0 {

keep lopenr lopenr_mor mutual_shock_f
rename lopenr lopenr_far
duplicates drop lopenr_far lopenr_mor, force
save "$processed_data\mutual_shock_week_f`father_subsample'.dta", replace
** no observations for subsample = 3
}
}


/* 2. CLEAN FINAL DATA WITH A VARIABLE IF MOTHER OR FATHER HAD A MUTUAL SHOCK */

* Mothers *
** +- 1 month
clear 
use "$processed_data\sample_healthshock.dta"
merge 1:1 lopenr_mor using "$processed_data\mutual_shock_month.dta"
drop _merge
save "$processed_data\matched_mutual_month.dta", replace

* Fathers *
** +- 1 month
clear 
use "$processed_data\matched_mutual_month.dta"
foreach father_subsample in 1 2  {
merge 1:1 lopenr_far lopenr_mor using "$processed_data\mutual_shock_month_f`father_subsample'.dta", update
drop _merge 
}

save "$processed_data\matched_mutual_month.dta", replace

* keep sample withouth parents shock
keep if mutual_shock !=  1 
keep if mutual_shock_f != 1

save "$processed_data\matched_mutual_month.dta", replace

* Mothers *
** +- 1 week
clear 
use "$processed_data\sample_healthshock.dta"
merge 1:1 lopenr_mor using "$processed_data\mutual_shock_week.dta"
drop _merge
save "$processed_data\matched_mutual_week.dta", replace

* Fathers *
** +- 1 week
clear 
use "$processed_data\matched_mutual_week.dta"
foreach father_subsample in 1 2  {
merge 1:1 lopenr_far lopenr_mor using "$processed_data\mutual_shock_week_f`father_subsample'.dta", update
drop _merge
}

save "$processed_data\matched_mutual_week.dta", replace

* keep sample withouth parents shock
keep if mutual_shock !=  1 
keep if mutual_shock_f != 1

save "$processed_data\matched_mutual_week.dta", replace

	
}
*********************************************
*** 8. CREATE SAMPLE OF ALL CHILD HOSPITAL STAYS
*********************************************
{
use "S:\Project\DemoSos2\data\NPR\som2008_2014.dta",clear
drop if lopenr == ""

* Keep only those aged 0-18
keep if alder <=18
tab alder
codebook lopenr 

rename innUke Week 
rename aar Year 

* Generate date of hospital admission
do GenNPRdate.do
gen InnDato = mdy(M,D,Year)
format InnDato %td
label var InnDato "Date hosptial admission"
gen Month = month(InnDato) 
codebook lopenr
drop if InnDato ==.   
drop D M
codebook lopenr
 
keep lopenr Year Week alder kjonn Behandlingsniva3 innmateHast hoved1 liggetid InnDato Month
order lopenr InnDato Year Month Week alder kjonn Behandlingsniva3 innmateHast hoved1 liggetid
duplicates drop

* Clean data 
* Some have the same admission over several rows ->  
	** Sum the no. of nights in hospital 
egen sum_liggetid=sum(liggetid), by(lopenr InnDato) 
bys lopenr InnDato: gen N = _N
bys lopenr InnDato: gen n = _n
replace liggetid=sum_liggetid if N>1 
drop sum_liggetid N n
duplicates drop

	** Keep one diagnose per acute adm. and one per planned adm.
bys lopenr InnDato Behandlingsniva3 innmateHast: gen N =_N
bys lopenr InnDato Behandlingsniva3 innmateHast: gen n =_n 
drop if n > 1 & N > 1   
drop N n
 
	** Drop those who have a planned and acute visit the same day (= PRE == 1)
gen acute = (innmateHast == 1)
gen planned = (innmateHast == 2)

bys lopenr InnDato: egen num_acute = max(acute)
bys lopenr InnDato: egen num_planned = max(planned)

gen drop_flag = (num_acute == 1 & num_planned == 1)
drop if drop_flag
drop acute planned num_acute num_planned drop_flag

* Some have an acute admission and acute outpatient consultation at the same date, keep the admission
	** Identify admission and consultation
by lopenr InnDato: gen N = _N 
by lopenr InnDato: keep if N == 1 | ( N > 1 & Behandlingsniva3 == 1 & innmateHast == 1)
drop N

* Identify and keep acute admissions with overnight stay, and ensure no visit 365 days before
gen healthshock=(innmateHast == 1 & Behandlingsniva3==1 & liggetid>0) 
bys lopenr: egen HS=max(healthshock) 
codebook lopenr if HS==0  
drop if HS==0 
drop HS

* Generate variable for the first health shock 
bys lopenr healthshock (InnDato): gen hs1=_n 
replace hs1=. if healthshock!=1
tab hs1, m

* Drop those who have a hosptial stay at all in the year before the HS 
sort lopenr InnDato
gen NPRsPre = (lopenr == lopenr[_n-1] & InnDato - InnDato[_n-1] <= 365 & hs1== 1)
tab NPRsPre
bys lopenr: egen PRE=max(NPRsPre) 
codebook lopenr if PRE == 0 
drop if PRE==1
codebook lopenr 

* Keep only the first health shock per individual
keep if hs1 == 1  
ren Year Year_HS
tab Year_HS

* Keep those with the first admission between ages 6-18 
tab alder
keep if alder > 5
codebook lopenr  
tab alder  
sum liggetid

* Drop all diagnoses related to birht and pregnancy
gen diagnose = (substr(hoved1, 1, 1))
encode diagnose, gen(Diagnose)
drop if Diagnose==15 | Diagnose==16 	

* Keep relevant variables and rename
keep lopenr InnDato Year_HS alder kjonn liggetid Diagnose hoved1 
ren InnDato first_inphospital

* Year of first hospital admission
g yearinp=year(first_inphospital)

save "$processed_data\HS_allchildren.dta", replace

** Merge samplw with children experiencing a health shock with sample of all children born between 1990-2008
use "S:\Project\DemoSos2\data\Befolkn\Demogr\fastevar", clear
keep if Fodtaar >=1990 & Fodtaar <=2008 
codebook lopenr 
tab Fodtaar

save "$processed_data\sample_allchildren.dta", replace
drop if lopenr_mor==""
keep lopenr* kjoenn invkat Fodtaar fodtmnd
ren kjoenn sex 
tab sex
recode sex 1=0 2=1
la def sex_ 0 "Male" 1 "Female"
la val sex sex_

gen male=(sex==0)
la def male_ 0 "Female" 1 "Male"
la val male male_
tab male

gen NORborn = (invkat=="A")
drop invkat
codebook lopenr
save "$processed_data\sample_allchildren.dta", replace

* Merge sample of all children with health shock sample
use "$processed_data\sample_allchildren.dta", clear
codebook lopenr
merge 1:1 lopenr using "$processed_data\HS_allchildren.dta"
drop if _merge == 2  
codebook lopenr
tab Fodtaar
gen inphospital=(Year_HS!=.)
tab inphospital
keep lopenr* male Fodtaar NORborn Year_HS inphospital
save "$processed_data\sample_allchildren.dta", replace

* Bakcground variables mother 
use "$processed_data\sample_allchildren.dta", clear 
keep lopenr_mor Fodtaar
tab Fodtaar
ren lopenr_mor lopenr
ren Fodtaar Byr
duplicates drop
codebook lopenr
merge m:1 lopenr using "S:\Project\DemoSos2\data\Befolkn\Demogr\fastevar"
keep if _merge == 3
drop _merge 

* Immigrant status and birth year 
keep lopenr Byr invkat Fodtaar 
gen NORborn_mom = (invkat=="A")
drop invkat
ren Fodtaar mother_b_year
save "$processed_data\temp_mom.dta", replace

* Marital status
use "$processed_data\temp_mom.dta", clear
gen aar = Byr
merge 1:m lopenr aar using "S:\Project\DemoSos2\data\Befolkn\Demogr\sivilstand.dta"
drop if _merge==2
sort lopenr aar
codebook lopenr

gen married_mom=(aar == Byr & sivilstand == 2) if sivilstand!=.
gen unmarried_mom=(aar == Byr & sivilstand == 1) if sivilstand!=.

drop aar sivilstand _merge 
save "$processed_data\temp_mom.dta", replace

* Years of education 
** Panel of education mom between 1991-2014
use "$processed_data\temp_mom.dta", clear
keep lopenr Byr
bys lopenr: gen n = _n
keep if n==1

forval v = 1990/2008 {
	gen y`v'=1
	}

reshape long y, i(lopenr) j(aar)
drop n y
save "$processed_data\temp_mor_år_panel.dta", replace

use "S:\Project\DemoSos2\data\Utd\BU70_91.dta", clear
append using "S:\Project\DemoSos2\data\Utd\BU92_14.dta"

keep if aar >= 1990 & aar<=2008
sort lopenr aar
save "$processed_data\educ_temp.dta", replace

use "$processed_data\temp_mor_år_panel.dta", clear
merge 1:1 lopenr aar using "$processed_data\educ_temp.dta"
drop if _merge == 2 
drop _merge 

* make a var that is the education level of the mothers in birth year
bys lopenr: keep if Byr==aar
g educ_=substr(BU,1,1)
destring educ_, replace

forvalues y=0(1)9 {
g educ`y'=(educ_==`y')
}

label var educ0 "No education mother" 
label var educ1 "Primary school mother" 
label var educ2 "Lower secondary mother"
label var educ3 "Upper secondary, basic educ. level mother"
label var educ4 "Upper secondary, final year mother"
label var educ5 "Post secondary non-tertiary mother"
label var educ6 "Bachelor's or equivalent level mother"
label var educ7 "Master's or equivalent level mother"
label var educ8 "Doctoral or equivalent level mother"
label var educ9 "Unspecified"

drop aar BU igang educ_
save "$processed_data\educ_temp.dta", replace

use "$processed_data\temp_mom.dta", clear
codebook lopenr
merge 1:1 lopenr Byr using "$processed_data\educ_temp.dta"
drop _merge 
ren lopenr lopenr_mor
save "$processed_data\temp_mom.dta", replace

use "$processed_data\sample_allchildren.dta", clear
ren Fodtaar Byr 
merge m:1 lopenr_mor Byr using "$processed_data\temp_mom.dta"
drop if _merge == 2
drop _merge 

save "$processed_data\sample_allchildren.dta", replace
}
















